{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dense Sentiment Classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook, we build a dense neural net to classify IMDB movie reviews by their sentiment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import keras\n",
    "from keras.datasets import imdb\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Flatten, Dropout\n",
    "from keras.layers import Embedding # new!\n",
    "from keras.callbacks import ModelCheckpoint # new! \n",
    "import os # new! \n",
    "from sklearn.metrics import roc_auc_score, roc_curve # new!\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt # new!\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Set hyperparameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# output directory name:\n",
    "output_dir = 'model_output/dense'\n",
    "\n",
    "# training:\n",
    "epochs = 4\n",
    "batch_size = 128\n",
    "\n",
    "# vector-space embedding: \n",
    "n_dim = 64\n",
    "n_unique_words = 5000 # as per Maas et al. (2011); may not be optimal\n",
    "n_words_to_skip = 50 # ditto\n",
    "max_review_length = 100\n",
    "pad_type = trunc_type = 'pre'\n",
    "\n",
    "# neural network architecture: \n",
    "n_dense = 64\n",
    "dropout = 0.5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For a given data set: \n",
    "\n",
    "* the Keras text utilities [here](https://keras.io/preprocessing/text/) quickly preprocess natural language and convert it into an index\n",
    "* the `keras.preprocessing.text.Tokenizer` class may do everything you need in one line:\n",
    "    * tokenize into words or characters\n",
    "    * `num_words`: maximum unique tokens\n",
    "    * filter out punctuation\n",
    "    * lower case\n",
    "    * convert words to an integer index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words, skip_top=n_words_to_skip) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ [2, 2, 2, 2, 2, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 2, 173, 2, 256, 2, 2, 100, 2, 838, 112, 50, 670, 2, 2, 2, 480, 284, 2, 150, 2, 172, 112, 167, 2, 336, 385, 2, 2, 172, 4536, 1111, 2, 546, 2, 2, 447, 2, 192, 50, 2, 2, 147, 2025, 2, 2, 2, 2, 1920, 4613, 469, 2, 2, 71, 87, 2, 2, 2, 530, 2, 76, 2, 2, 1247, 2, 2, 2, 515, 2, 2, 2, 626, 2, 2, 2, 62, 386, 2, 2, 316, 2, 106, 2, 2, 2223, 2, 2, 480, 66, 3785, 2, 2, 130, 2, 2, 2, 619, 2, 2, 124, 51, 2, 135, 2, 2, 1415, 2, 2, 2, 2, 215, 2, 77, 52, 2, 2, 407, 2, 82, 2, 2, 2, 107, 117, 2, 2, 256, 2, 2, 2, 3766, 2, 723, 2, 71, 2, 530, 476, 2, 400, 317, 2, 2, 2, 2, 1029, 2, 104, 88, 2, 381, 2, 297, 98, 2, 2071, 56, 2, 141, 2, 194, 2, 2, 2, 226, 2, 2, 134, 476, 2, 480, 2, 144, 2, 2, 2, 51, 2, 2, 224, 92, 2, 104, 2, 226, 65, 2, 2, 1334, 88, 2, 2, 283, 2, 2, 4472, 113, 103, 2, 2, 2, 2, 2, 178, 2],\n",
       "       [2, 194, 1153, 194, 2, 78, 228, 2, 2, 1463, 4369, 2, 134, 2, 2, 715, 2, 118, 1634, 2, 394, 2, 2, 119, 954, 189, 102, 2, 207, 110, 3103, 2, 2, 69, 188, 2, 2, 2, 2, 2, 249, 126, 93, 2, 114, 2, 2300, 1523, 2, 647, 2, 116, 2, 2, 2, 2, 229, 2, 340, 1322, 2, 118, 2, 2, 130, 4901, 2, 2, 1002, 2, 89, 2, 952, 2, 2, 2, 455, 2, 2, 2, 2, 1543, 1905, 398, 2, 1649, 2, 2, 2, 163, 2, 3215, 2, 2, 1153, 2, 194, 775, 2, 2, 2, 349, 2637, 148, 605, 2, 2, 2, 123, 125, 68, 2, 2, 2, 349, 165, 4362, 98, 2, 2, 228, 2, 2, 2, 1157, 2, 299, 120, 2, 120, 174, 2, 220, 175, 136, 50, 2, 4373, 228, 2, 2, 2, 656, 245, 2350, 2, 2, 2, 131, 152, 491, 2, 2, 2, 2, 1212, 2, 2, 2, 371, 78, 2, 625, 64, 1382, 2, 2, 168, 145, 2, 2, 1690, 2, 2, 2, 1355, 2, 2, 2, 52, 154, 462, 2, 89, 78, 285, 2, 145, 95],\n",
       "       [2, 2, 2, 2, 2, 2, 2, 2, 249, 108, 2, 2, 2, 54, 61, 369, 2, 71, 149, 2, 2, 112, 2, 2401, 311, 2, 2, 3711, 2, 75, 2, 1829, 296, 2, 86, 320, 2, 534, 2, 263, 4821, 1301, 2, 1873, 2, 89, 78, 2, 66, 2, 2, 360, 2, 2, 58, 316, 334, 2, 2, 1716, 2, 645, 662, 2, 257, 85, 1200, 2, 1228, 2578, 83, 68, 3912, 2, 2, 165, 1539, 278, 2, 69, 2, 780, 2, 106, 2, 2, 1338, 2, 2, 2, 2, 215, 2, 610, 2, 2, 87, 326, 2, 2300, 2, 2, 2, 2, 272, 2, 57, 2, 2, 2, 2, 2, 2, 2307, 51, 2, 170, 2, 595, 116, 595, 1352, 2, 191, 79, 638, 89, 2, 2, 2, 2, 106, 607, 624, 2, 534, 2, 227, 2, 129, 113],\n",
       "       [2, 2, 2, 2, 2, 2804, 2, 2040, 432, 111, 153, 103, 2, 1494, 2, 70, 131, 67, 2, 61, 2, 744, 2, 3715, 761, 61, 2, 452, 2, 2, 985, 2, 2, 59, 166, 2, 105, 216, 1239, 2, 1797, 2, 2, 2, 2, 744, 2413, 2, 2, 2, 687, 2, 2, 2, 2, 2, 3693, 2, 2, 2, 121, 59, 456, 2, 2, 2, 265, 2, 575, 111, 153, 159, 59, 2, 1447, 2, 2, 586, 482, 2, 2, 96, 59, 716, 2, 2, 172, 65, 2, 579, 2, 2, 2, 1615, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 464, 2, 314, 2, 2, 2, 719, 605, 2, 2, 202, 2, 310, 2, 3772, 3501, 2, 2722, 58, 2, 2, 537, 2116, 180, 2, 2, 413, 173, 2, 263, 112, 2, 152, 377, 2, 537, 263, 846, 579, 178, 54, 75, 71, 476, 2, 413, 263, 2504, 182, 2, 2, 75, 2306, 922, 2, 279, 131, 2895, 2, 2867, 2, 2, 2, 921, 2, 192, 2, 1219, 3890, 2, 2, 217, 4122, 1710, 537, 2, 1236, 2, 736, 2, 2, 61, 403, 2, 2, 2, 61, 4494, 2, 2, 4494, 159, 90, 263, 2311, 4319, 309, 2, 178, 2, 82, 4319, 2, 65, 2, 2, 145, 143, 2, 2, 2, 537, 746, 537, 537, 2, 2, 2, 2, 594, 2, 2, 94, 2, 3987, 2, 2, 2, 2, 538, 2, 1795, 246, 2, 2, 2, 2, 635, 2, 2, 51, 408, 2, 94, 318, 1382, 2, 2, 2, 2683, 936, 2, 2, 2, 2, 2, 2, 2, 1885, 2, 1118, 2, 80, 126, 842, 2, 2, 2, 2, 4726, 2, 4494, 2, 1550, 3633, 159, 2, 341, 2, 2733, 2, 4185, 173, 2, 90, 2, 2, 2, 2, 2, 1784, 86, 1117, 2, 3261, 2, 2, 2, 2, 2, 2, 2841, 2, 2, 1010, 2, 793, 2, 2, 1386, 1830, 2, 2, 246, 50, 2, 2, 2750, 1944, 746, 90, 2, 2, 2, 124, 2, 882, 2, 882, 496, 2, 2, 2213, 537, 121, 127, 1219, 130, 2, 2, 494, 2, 124, 2, 882, 496, 2, 341, 2, 2, 846, 2, 2, 2, 2, 1906, 2, 97, 2, 236, 2, 1311, 2, 2, 2, 2, 2, 2, 2, 91, 2, 3987, 70, 2, 882, 2, 579, 2, 2, 2, 2, 2, 537, 2, 2, 2, 2, 65, 2, 537, 75, 2, 1775, 3353, 2, 1846, 2, 2, 2, 154, 2, 2, 518, 53, 2, 2, 2, 3211, 882, 2, 399, 2, 75, 257, 3807, 2, 2, 2, 2, 456, 2, 65, 2, 2, 205, 113, 2, 2, 2, 2, 2, 2, 2, 242, 2, 91, 1202, 2, 2, 2070, 307, 2, 2, 2, 126, 93, 2, 2, 2, 188, 1076, 3222, 2, 2, 2, 2, 2348, 537, 2, 53, 537, 2, 82, 2, 2, 2, 2, 2, 280, 2, 219, 2, 2, 431, 758, 859, 2, 953, 1052, 2, 2, 2, 2, 94, 2, 2, 238, 60, 2, 2, 2, 804, 2, 2, 2, 2, 132, 2, 67, 2, 2, 2, 2, 283, 2, 2, 2, 2, 2, 242, 955, 2, 2, 279, 2, 2, 2, 1685, 195, 2, 238, 60, 796, 2, 2, 671, 2, 2804, 2, 2, 559, 154, 888, 2, 726, 50, 2, 2, 2, 2, 566, 2, 579, 2, 64, 2574],\n",
       "       [2, 249, 1323, 2, 61, 113, 2, 2, 2, 1637, 2, 2, 56, 2, 2401, 2, 457, 88, 2, 2626, 1400, 2, 3171, 2, 70, 79, 2, 706, 919, 2, 2, 355, 340, 355, 1696, 96, 143, 2, 2, 2, 289, 2, 61, 369, 71, 2359, 2, 2, 2, 131, 2073, 249, 114, 249, 229, 249, 2, 2, 2, 126, 110, 2, 473, 2, 569, 61, 419, 56, 429, 2, 1513, 2, 2, 534, 95, 474, 570, 2, 2, 124, 138, 88, 2, 421, 1543, 52, 725, 2, 61, 419, 2, 2, 1571, 2, 1543, 2, 2, 2, 2, 2, 296, 2, 3524, 2, 2, 421, 128, 74, 233, 334, 207, 126, 224, 2, 562, 298, 2167, 1272, 2, 2601, 2, 516, 988, 2, 2, 79, 120, 2, 595, 2, 784, 2, 3171, 2, 165, 170, 143, 2, 2, 2, 2, 2, 226, 251, 2, 61, 113],\n",
       "       [2, 778, 128, 74, 2, 630, 163, 2, 2, 1766, 2, 1051, 2, 2, 85, 156, 2, 2, 148, 139, 121, 664, 665, 2, 2, 1361, 173, 2, 749, 2, 2, 3804, 2, 2, 226, 65, 2, 2, 127, 2, 2, 2, 2]], dtype=object)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train[0:6] # 0 reserved for padding; 1 would be starting character; 2 is unknown; 3 is most common word, etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "218\n",
      "189\n",
      "141\n",
      "550\n",
      "147\n",
      "43\n"
     ]
    }
   ],
   "source": [
    "for x in x_train[0:6]:\n",
    "    print(len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 0, 0, 1, 0, 0])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train[0:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25000, 25000)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(x_train), len(x_valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Restoring words from index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "word_index = keras.datasets.imdb.get_word_index()\n",
    "word_index = {k:(v+3) for k,v in word_index.items()}\n",
    "word_index[\"PAD\"] = 0\n",
    "word_index[\"START\"] = 1\n",
    "word_index[\"UNK\"] = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'fawn': 34704,\n",
       " 'tsukino': 52009,\n",
       " 'nunnery': 52010,\n",
       " 'sonja': 16819,\n",
       " 'vani': 63954,\n",
       " 'woods': 1411,\n",
       " 'spiders': 16118,\n",
       " 'hanging': 2348,\n",
       " 'woody': 2292,\n",
       " 'trawling': 52011,\n",
       " \"hold's\": 52012,\n",
       " 'comically': 11310,\n",
       " 'localized': 40833,\n",
       " 'disobeying': 30571,\n",
       " \"'royale\": 52013,\n",
       " \"harpo's\": 40834,\n",
       " 'canet': 52014,\n",
       " 'aileen': 19316,\n",
       " 'acurately': 52015,\n",
       " \"diplomat's\": 52016,\n",
       " 'rickman': 25245,\n",
       " 'arranged': 6749,\n",
       " 'rumbustious': 52017,\n",
       " 'familiarness': 52018,\n",
       " \"spider'\": 52019,\n",
       " 'hahahah': 68807,\n",
       " \"wood'\": 52020,\n",
       " 'transvestism': 40836,\n",
       " \"hangin'\": 34705,\n",
       " 'bringing': 2341,\n",
       " 'seamier': 40837,\n",
       " 'wooded': 34706,\n",
       " 'bravora': 52021,\n",
       " 'grueling': 16820,\n",
       " 'wooden': 1639,\n",
       " 'wednesday': 16821,\n",
       " \"'prix\": 52022,\n",
       " 'altagracia': 34707,\n",
       " 'circuitry': 52023,\n",
       " 'crotch': 11588,\n",
       " 'busybody': 57769,\n",
       " \"tart'n'tangy\": 52024,\n",
       " 'burgade': 14132,\n",
       " 'thrace': 52026,\n",
       " \"tom's\": 11041,\n",
       " 'snuggles': 52028,\n",
       " 'francesco': 29117,\n",
       " 'complainers': 52030,\n",
       " 'templarios': 52128,\n",
       " '272': 40838,\n",
       " '273': 52031,\n",
       " 'zaniacs': 52133,\n",
       " '275': 34709,\n",
       " 'consenting': 27634,\n",
       " 'snuggled': 40839,\n",
       " 'inanimate': 15495,\n",
       " 'uality': 52033,\n",
       " 'bronte': 11929,\n",
       " 'errors': 4013,\n",
       " 'dialogs': 3233,\n",
       " \"yomada's\": 52034,\n",
       " \"madman's\": 34710,\n",
       " 'dialoge': 30588,\n",
       " 'usenet': 52036,\n",
       " 'videodrome': 40840,\n",
       " \"kid'\": 26341,\n",
       " 'pawed': 52037,\n",
       " \"'girlfriend'\": 30572,\n",
       " \"'pleasure\": 52038,\n",
       " \"'reloaded'\": 52039,\n",
       " \"kazakos'\": 40842,\n",
       " 'rocque': 52040,\n",
       " 'mailings': 52041,\n",
       " 'brainwashed': 11930,\n",
       " 'mcanally': 16822,\n",
       " \"tom''\": 52042,\n",
       " 'kurupt': 25246,\n",
       " 'affiliated': 21908,\n",
       " 'babaganoosh': 52043,\n",
       " \"noe's\": 40843,\n",
       " 'quart': 40844,\n",
       " 'kids': 362,\n",
       " 'uplifting': 5037,\n",
       " 'controversy': 7096,\n",
       " 'kida': 21909,\n",
       " 'kidd': 23382,\n",
       " \"error'\": 52044,\n",
       " 'neurologist': 52045,\n",
       " 'spotty': 18513,\n",
       " 'cobblers': 30573,\n",
       " 'projection': 9881,\n",
       " 'fastforwarding': 40845,\n",
       " 'sters': 52046,\n",
       " \"eggar's\": 52047,\n",
       " 'etherything': 52048,\n",
       " 'gateshead': 40846,\n",
       " 'airball': 34711,\n",
       " 'unsinkable': 25247,\n",
       " 'stern': 7183,\n",
       " \"cervi's\": 52049,\n",
       " 'dnd': 40847,\n",
       " 'dna': 11589,\n",
       " 'insecurity': 20601,\n",
       " \"'reboot'\": 52050,\n",
       " 'trelkovsky': 11040,\n",
       " 'jaekel': 52051,\n",
       " 'sidebars': 52052,\n",
       " \"sforza's\": 52053,\n",
       " 'distortions': 17636,\n",
       " 'mutinies': 52054,\n",
       " 'sermons': 30605,\n",
       " '7ft': 40849,\n",
       " 'boobage': 52055,\n",
       " \"o'bannon's\": 52056,\n",
       " 'populations': 23383,\n",
       " 'chulak': 52057,\n",
       " 'mesmerize': 27636,\n",
       " 'quinnell': 52058,\n",
       " 'yahoo': 10310,\n",
       " 'meteorologist': 52060,\n",
       " 'beswick': 42580,\n",
       " 'boorman': 15496,\n",
       " 'voicework': 40850,\n",
       " \"ster'\": 52061,\n",
       " 'blustering': 22925,\n",
       " 'hj': 52062,\n",
       " 'intake': 27637,\n",
       " 'morally': 5624,\n",
       " 'jumbling': 40852,\n",
       " 'bowersock': 52063,\n",
       " \"'porky's'\": 52064,\n",
       " 'gershon': 16824,\n",
       " 'ludicrosity': 40853,\n",
       " 'coprophilia': 52065,\n",
       " 'expressively': 40854,\n",
       " \"india's\": 19503,\n",
       " \"post's\": 34713,\n",
       " 'wana': 52066,\n",
       " 'wang': 5286,\n",
       " 'wand': 30574,\n",
       " 'wane': 25248,\n",
       " 'edgeways': 52324,\n",
       " 'titanium': 34714,\n",
       " 'pinta': 40855,\n",
       " 'want': 181,\n",
       " 'pinto': 30575,\n",
       " 'whoopdedoodles': 52068,\n",
       " 'tchaikovsky': 21911,\n",
       " 'travel': 2106,\n",
       " \"'victory'\": 52069,\n",
       " 'copious': 11931,\n",
       " 'gouge': 22436,\n",
       " \"chapters'\": 52070,\n",
       " 'barbra': 6705,\n",
       " 'uselessness': 30576,\n",
       " \"wan'\": 52071,\n",
       " 'assimilated': 27638,\n",
       " 'petiot': 16119,\n",
       " 'most\\x85and': 52072,\n",
       " 'dinosaurs': 3933,\n",
       " 'wrong': 355,\n",
       " 'seda': 52073,\n",
       " 'stollen': 52074,\n",
       " 'sentencing': 34715,\n",
       " 'ouroboros': 40856,\n",
       " 'assimilates': 40857,\n",
       " 'colorfully': 40858,\n",
       " 'glenne': 27639,\n",
       " 'dongen': 52075,\n",
       " 'subplots': 4763,\n",
       " 'kiloton': 52076,\n",
       " 'chandon': 23384,\n",
       " \"effect'\": 34716,\n",
       " 'snugly': 27640,\n",
       " 'kuei': 40859,\n",
       " 'welcomed': 9095,\n",
       " 'dishonor': 30074,\n",
       " 'concurrence': 52078,\n",
       " 'stoicism': 23385,\n",
       " \"guys'\": 14899,\n",
       " \"beroemd'\": 52080,\n",
       " 'butcher': 6706,\n",
       " \"melfi's\": 40860,\n",
       " 'aargh': 30626,\n",
       " 'playhouse': 20602,\n",
       " 'wickedly': 11311,\n",
       " 'fit': 1183,\n",
       " 'labratory': 52081,\n",
       " 'lifeline': 40862,\n",
       " 'screaming': 1930,\n",
       " 'fix': 4290,\n",
       " 'cineliterate': 52082,\n",
       " 'fic': 52083,\n",
       " 'fia': 52084,\n",
       " 'fig': 34717,\n",
       " 'fmvs': 52085,\n",
       " 'fie': 52086,\n",
       " 'reentered': 52087,\n",
       " 'fin': 30577,\n",
       " 'doctresses': 52088,\n",
       " 'fil': 52089,\n",
       " 'zucker': 12609,\n",
       " 'ached': 31934,\n",
       " 'counsil': 52091,\n",
       " 'paterfamilias': 52092,\n",
       " 'songwriter': 13888,\n",
       " 'shivam': 34718,\n",
       " 'hurting': 9657,\n",
       " 'effects': 302,\n",
       " 'slauther': 52093,\n",
       " \"'flame'\": 52094,\n",
       " 'sommerset': 52095,\n",
       " 'interwhined': 52096,\n",
       " 'whacking': 27641,\n",
       " 'bartok': 52097,\n",
       " 'barton': 8778,\n",
       " 'frewer': 21912,\n",
       " \"fi'\": 52098,\n",
       " 'ingrid': 6195,\n",
       " 'stribor': 30578,\n",
       " 'approporiately': 52099,\n",
       " 'wobblyhand': 52100,\n",
       " 'tantalisingly': 52101,\n",
       " 'ankylosaurus': 52102,\n",
       " 'parasites': 17637,\n",
       " 'childen': 52103,\n",
       " \"jenkins'\": 52104,\n",
       " 'metafiction': 52105,\n",
       " 'golem': 17638,\n",
       " 'indiscretion': 40863,\n",
       " \"reeves'\": 23386,\n",
       " \"inamorata's\": 57784,\n",
       " 'brittannica': 52107,\n",
       " 'adapt': 7919,\n",
       " \"russo's\": 30579,\n",
       " 'guitarists': 48249,\n",
       " 'abbott': 10556,\n",
       " 'abbots': 40864,\n",
       " 'lanisha': 17652,\n",
       " 'magickal': 40866,\n",
       " 'mattter': 52108,\n",
       " \"'willy\": 52109,\n",
       " 'pumpkins': 34719,\n",
       " 'stuntpeople': 52110,\n",
       " 'estimate': 30580,\n",
       " 'ugghhh': 40867,\n",
       " 'gameplay': 11312,\n",
       " \"wern't\": 52111,\n",
       " \"n'sync\": 40868,\n",
       " 'sickeningly': 16120,\n",
       " 'chiara': 40869,\n",
       " 'disturbed': 4014,\n",
       " 'portmanteau': 40870,\n",
       " 'ineffectively': 52112,\n",
       " \"duchonvey's\": 82146,\n",
       " \"nasty'\": 37522,\n",
       " 'purpose': 1288,\n",
       " 'lazers': 52115,\n",
       " 'lightened': 28108,\n",
       " 'kaliganj': 52116,\n",
       " 'popularism': 52117,\n",
       " \"damme's\": 18514,\n",
       " 'stylistics': 30581,\n",
       " 'mindgaming': 52118,\n",
       " 'spoilerish': 46452,\n",
       " \"'corny'\": 52120,\n",
       " 'boerner': 34721,\n",
       " 'olds': 6795,\n",
       " 'bakelite': 52121,\n",
       " 'renovated': 27642,\n",
       " 'forrester': 27643,\n",
       " \"lumiere's\": 52122,\n",
       " 'gaskets': 52027,\n",
       " 'needed': 887,\n",
       " 'smight': 34722,\n",
       " 'master': 1300,\n",
       " \"edie's\": 25908,\n",
       " 'seeber': 40871,\n",
       " 'hiya': 52123,\n",
       " 'fuzziness': 52124,\n",
       " 'genesis': 14900,\n",
       " 'rewards': 12610,\n",
       " 'enthrall': 30582,\n",
       " \"'about\": 40872,\n",
       " \"recollection's\": 52125,\n",
       " 'mutilated': 11042,\n",
       " 'fatherlands': 52126,\n",
       " \"fischer's\": 52127,\n",
       " 'positively': 5402,\n",
       " '270': 34708,\n",
       " 'ahmed': 34723,\n",
       " 'zatoichi': 9839,\n",
       " 'bannister': 13889,\n",
       " 'anniversaries': 52130,\n",
       " \"helm's\": 30583,\n",
       " \"'work'\": 52131,\n",
       " 'exclaimed': 34724,\n",
       " \"'unfunny'\": 52132,\n",
       " '274': 52032,\n",
       " 'feeling': 547,\n",
       " \"wanda's\": 52134,\n",
       " 'dolan': 33269,\n",
       " '278': 52136,\n",
       " 'peacoat': 52137,\n",
       " 'brawny': 40873,\n",
       " 'mishra': 40874,\n",
       " 'worlders': 40875,\n",
       " 'protags': 52138,\n",
       " 'skullcap': 52139,\n",
       " 'dastagir': 57599,\n",
       " 'affairs': 5625,\n",
       " 'wholesome': 7802,\n",
       " 'hymen': 52140,\n",
       " 'paramedics': 25249,\n",
       " 'unpersons': 52141,\n",
       " 'heavyarms': 52142,\n",
       " 'affaire': 52143,\n",
       " 'coulisses': 52144,\n",
       " 'hymer': 40876,\n",
       " 'kremlin': 52145,\n",
       " 'shipments': 30584,\n",
       " 'pixilated': 52146,\n",
       " \"'00s\": 30585,\n",
       " 'diminishing': 18515,\n",
       " 'cinematic': 1360,\n",
       " 'resonates': 14901,\n",
       " 'simplify': 40877,\n",
       " \"nature'\": 40878,\n",
       " 'temptresses': 40879,\n",
       " 'reverence': 16825,\n",
       " 'resonated': 19505,\n",
       " 'dailey': 34725,\n",
       " '2\\x85': 52147,\n",
       " 'treize': 27644,\n",
       " 'majo': 52148,\n",
       " 'kiya': 21913,\n",
       " 'woolnough': 52149,\n",
       " 'thanatos': 39800,\n",
       " 'sandoval': 35734,\n",
       " 'dorama': 40882,\n",
       " \"o'shaughnessy\": 52150,\n",
       " 'tech': 4991,\n",
       " 'fugitives': 32021,\n",
       " 'teck': 30586,\n",
       " \"'e'\": 76128,\n",
       " 'doesn’t': 40884,\n",
       " 'purged': 52152,\n",
       " 'saying': 660,\n",
       " \"martians'\": 41098,\n",
       " 'norliss': 23421,\n",
       " 'dickey': 27645,\n",
       " 'dicker': 52155,\n",
       " \"'sependipity\": 52156,\n",
       " 'padded': 8425,\n",
       " 'ordell': 57795,\n",
       " \"sturges'\": 40885,\n",
       " 'independentcritics': 52157,\n",
       " 'tempted': 5748,\n",
       " \"atkinson's\": 34727,\n",
       " 'hounded': 25250,\n",
       " 'apace': 52158,\n",
       " 'clicked': 15497,\n",
       " \"'humor'\": 30587,\n",
       " \"martino's\": 17180,\n",
       " \"'supporting\": 52159,\n",
       " 'warmongering': 52035,\n",
       " \"zemeckis's\": 34728,\n",
       " 'lube': 21914,\n",
       " 'shocky': 52160,\n",
       " 'plate': 7479,\n",
       " 'plata': 40886,\n",
       " 'sturgess': 40887,\n",
       " \"nerds'\": 40888,\n",
       " 'plato': 20603,\n",
       " 'plath': 34729,\n",
       " 'platt': 40889,\n",
       " 'mcnab': 52162,\n",
       " 'clumsiness': 27646,\n",
       " 'altogether': 3902,\n",
       " 'massacring': 42587,\n",
       " 'bicenntinial': 52163,\n",
       " 'skaal': 40890,\n",
       " 'droning': 14363,\n",
       " 'lds': 8779,\n",
       " 'jaguar': 21915,\n",
       " \"cale's\": 34730,\n",
       " 'nicely': 1780,\n",
       " 'mummy': 4591,\n",
       " \"lot's\": 18516,\n",
       " 'patch': 10089,\n",
       " 'kerkhof': 50205,\n",
       " \"leader's\": 52164,\n",
       " \"'movie\": 27647,\n",
       " 'uncomfirmed': 52165,\n",
       " 'heirloom': 40891,\n",
       " 'wrangle': 47363,\n",
       " 'emotion\\x85': 52166,\n",
       " \"'stargate'\": 52167,\n",
       " 'pinoy': 40892,\n",
       " 'conchatta': 40893,\n",
       " 'broeke': 41131,\n",
       " 'advisedly': 40894,\n",
       " \"barker's\": 17639,\n",
       " 'descours': 52169,\n",
       " 'lots': 775,\n",
       " 'lotr': 9262,\n",
       " 'irs': 9882,\n",
       " 'lott': 52170,\n",
       " 'xvi': 40895,\n",
       " 'irk': 34731,\n",
       " 'irl': 52171,\n",
       " 'ira': 6890,\n",
       " 'belzer': 21916,\n",
       " 'irc': 52172,\n",
       " 'ire': 27648,\n",
       " 'requisites': 40896,\n",
       " 'discipline': 7696,\n",
       " 'lyoko': 52964,\n",
       " 'extend': 11313,\n",
       " 'nature': 876,\n",
       " \"'dickie'\": 52173,\n",
       " 'optimist': 40897,\n",
       " 'lapping': 30589,\n",
       " 'superficial': 3903,\n",
       " 'vestment': 52174,\n",
       " 'extent': 2826,\n",
       " 'tendons': 52175,\n",
       " \"heller's\": 52176,\n",
       " 'quagmires': 52177,\n",
       " 'miyako': 52178,\n",
       " 'moocow': 20604,\n",
       " \"coles'\": 52179,\n",
       " 'lookit': 40898,\n",
       " 'ravenously': 52180,\n",
       " 'levitating': 40899,\n",
       " 'perfunctorily': 52181,\n",
       " 'lookin': 30590,\n",
       " \"lot'\": 40901,\n",
       " 'lookie': 52182,\n",
       " 'fearlessly': 34873,\n",
       " 'libyan': 52184,\n",
       " 'fondles': 40902,\n",
       " 'gopher': 35717,\n",
       " 'wearying': 40904,\n",
       " \"nz's\": 52185,\n",
       " 'minuses': 27649,\n",
       " 'puposelessly': 52186,\n",
       " 'shandling': 52187,\n",
       " 'decapitates': 31271,\n",
       " 'humming': 11932,\n",
       " \"'nother\": 40905,\n",
       " 'smackdown': 21917,\n",
       " 'underdone': 30591,\n",
       " 'frf': 40906,\n",
       " 'triviality': 52188,\n",
       " 'fro': 25251,\n",
       " 'bothers': 8780,\n",
       " \"'kensington\": 52189,\n",
       " 'much': 76,\n",
       " 'muco': 34733,\n",
       " 'wiseguy': 22618,\n",
       " \"richie's\": 27651,\n",
       " 'tonino': 40907,\n",
       " 'unleavened': 52190,\n",
       " 'fry': 11590,\n",
       " \"'tv'\": 40908,\n",
       " 'toning': 40909,\n",
       " 'obese': 14364,\n",
       " 'sensationalized': 30592,\n",
       " 'spiv': 40910,\n",
       " 'spit': 6262,\n",
       " 'arkin': 7367,\n",
       " 'charleton': 21918,\n",
       " 'jeon': 16826,\n",
       " 'boardroom': 21919,\n",
       " 'doubts': 4992,\n",
       " 'spin': 3087,\n",
       " 'hepo': 53086,\n",
       " 'wildcat': 27652,\n",
       " 'venoms': 10587,\n",
       " 'misconstrues': 52194,\n",
       " 'mesmerising': 18517,\n",
       " 'misconstrued': 40911,\n",
       " 'rescinds': 52195,\n",
       " 'prostrate': 52196,\n",
       " 'majid': 40912,\n",
       " 'climbed': 16482,\n",
       " 'canoeing': 34734,\n",
       " 'majin': 52198,\n",
       " 'animie': 57807,\n",
       " 'sylke': 40913,\n",
       " 'conditioned': 14902,\n",
       " 'waddell': 40914,\n",
       " '3\\x85': 52199,\n",
       " 'hyperdrive': 41191,\n",
       " 'conditioner': 34735,\n",
       " 'bricklayer': 53156,\n",
       " 'hong': 2579,\n",
       " 'memoriam': 52201,\n",
       " 'inventively': 30595,\n",
       " \"levant's\": 25252,\n",
       " 'portobello': 20641,\n",
       " 'remand': 52203,\n",
       " 'mummified': 19507,\n",
       " 'honk': 27653,\n",
       " 'spews': 19508,\n",
       " 'visitations': 40915,\n",
       " 'mummifies': 52204,\n",
       " 'cavanaugh': 25253,\n",
       " 'zeon': 23388,\n",
       " \"jungle's\": 40916,\n",
       " 'viertel': 34736,\n",
       " 'frenchmen': 27654,\n",
       " 'torpedoes': 52205,\n",
       " 'schlessinger': 52206,\n",
       " 'torpedoed': 34737,\n",
       " 'blister': 69879,\n",
       " 'cinefest': 52207,\n",
       " 'furlough': 34738,\n",
       " 'mainsequence': 52208,\n",
       " 'mentors': 40917,\n",
       " 'academic': 9097,\n",
       " 'stillness': 20605,\n",
       " 'academia': 40918,\n",
       " 'lonelier': 52209,\n",
       " 'nibby': 52210,\n",
       " \"losers'\": 52211,\n",
       " 'cineastes': 40919,\n",
       " 'corporate': 4452,\n",
       " 'massaging': 40920,\n",
       " 'bellow': 30596,\n",
       " 'absurdities': 19509,\n",
       " 'expetations': 53244,\n",
       " 'nyfiken': 40921,\n",
       " 'mehras': 75641,\n",
       " 'lasse': 52212,\n",
       " 'visability': 52213,\n",
       " 'militarily': 33949,\n",
       " \"elder'\": 52214,\n",
       " 'gainsbourg': 19026,\n",
       " 'hah': 20606,\n",
       " 'hai': 13423,\n",
       " 'haj': 34739,\n",
       " 'hak': 25254,\n",
       " 'hal': 4314,\n",
       " 'ham': 4895,\n",
       " 'duffer': 53262,\n",
       " 'haa': 52216,\n",
       " 'had': 69,\n",
       " 'advancement': 11933,\n",
       " 'hag': 16828,\n",
       " \"hand'\": 25255,\n",
       " 'hay': 13424,\n",
       " 'mcnamara': 20607,\n",
       " \"mozart's\": 52217,\n",
       " 'duffel': 30734,\n",
       " 'haq': 30597,\n",
       " 'har': 13890,\n",
       " 'has': 47,\n",
       " 'hat': 2404,\n",
       " 'hav': 40922,\n",
       " 'haw': 30598,\n",
       " 'figtings': 52218,\n",
       " 'elders': 15498,\n",
       " 'underpanted': 52219,\n",
       " 'pninson': 52220,\n",
       " 'unequivocally': 27655,\n",
       " \"barbara's\": 23676,\n",
       " \"bello'\": 52222,\n",
       " 'indicative': 13000,\n",
       " 'yawnfest': 40923,\n",
       " 'hexploitation': 52223,\n",
       " \"loder's\": 52224,\n",
       " 'sleuthing': 27656,\n",
       " \"justin's\": 32625,\n",
       " \"'ball\": 52225,\n",
       " \"'summer\": 52226,\n",
       " \"'demons'\": 34938,\n",
       " \"mormon's\": 52228,\n",
       " \"laughton's\": 34740,\n",
       " 'debell': 52229,\n",
       " 'shipyard': 39727,\n",
       " 'unabashedly': 30600,\n",
       " 'disks': 40404,\n",
       " 'crowd': 2293,\n",
       " 'crowe': 10090,\n",
       " \"vancouver's\": 56437,\n",
       " 'mosques': 34741,\n",
       " 'crown': 6630,\n",
       " 'culpas': 52230,\n",
       " 'crows': 27657,\n",
       " 'surrell': 53347,\n",
       " 'flowless': 52232,\n",
       " 'sheirk': 52233,\n",
       " \"'three\": 40926,\n",
       " \"peterson'\": 52234,\n",
       " 'ooverall': 52235,\n",
       " 'perchance': 40927,\n",
       " 'bottom': 1324,\n",
       " 'chabert': 53366,\n",
       " 'sneha': 52236,\n",
       " 'inhuman': 13891,\n",
       " 'ichii': 52237,\n",
       " 'ursla': 52238,\n",
       " 'completly': 30601,\n",
       " 'moviedom': 40928,\n",
       " 'raddick': 52239,\n",
       " 'brundage': 51998,\n",
       " 'brigades': 40929,\n",
       " 'starring': 1184,\n",
       " \"'goal'\": 52240,\n",
       " 'caskets': 52241,\n",
       " 'willcock': 52242,\n",
       " \"threesome's\": 52243,\n",
       " \"mosque'\": 52244,\n",
       " \"cover's\": 52245,\n",
       " 'spaceships': 17640,\n",
       " 'anomalous': 40930,\n",
       " 'ptsd': 27658,\n",
       " 'shirdan': 52246,\n",
       " 'obscenity': 21965,\n",
       " 'lemmings': 30602,\n",
       " 'duccio': 30603,\n",
       " \"levene's\": 52247,\n",
       " \"'gorby'\": 52248,\n",
       " \"teenager's\": 25258,\n",
       " 'marshall': 5343,\n",
       " 'honeymoon': 9098,\n",
       " 'shoots': 3234,\n",
       " 'despised': 12261,\n",
       " 'okabasho': 52249,\n",
       " 'fabric': 8292,\n",
       " 'cannavale': 18518,\n",
       " 'raped': 3540,\n",
       " \"tutt's\": 52250,\n",
       " 'grasping': 17641,\n",
       " 'despises': 18519,\n",
       " \"thief's\": 40931,\n",
       " 'rapes': 8929,\n",
       " 'raper': 52251,\n",
       " \"eyre'\": 27659,\n",
       " 'walchek': 52252,\n",
       " \"elmo's\": 23389,\n",
       " 'perfumes': 40932,\n",
       " 'spurting': 21921,\n",
       " \"exposition'\\x85\": 52253,\n",
       " 'denoting': 52254,\n",
       " 'thesaurus': 34743,\n",
       " \"shoot'\": 40933,\n",
       " 'bonejack': 49762,\n",
       " 'simpsonian': 52256,\n",
       " 'hebetude': 30604,\n",
       " \"hallow's\": 34744,\n",
       " 'desperation\\x85': 52257,\n",
       " 'incinerator': 34745,\n",
       " 'congratulations': 10311,\n",
       " 'humbled': 52258,\n",
       " \"else's\": 5927,\n",
       " 'trelkovski': 40848,\n",
       " \"rape'\": 52259,\n",
       " \"'chapters'\": 59389,\n",
       " '1600s': 52260,\n",
       " 'martian': 7256,\n",
       " 'nicest': 25259,\n",
       " 'eyred': 52262,\n",
       " 'passenger': 9460,\n",
       " 'disgrace': 6044,\n",
       " 'moderne': 52263,\n",
       " 'barrymore': 5123,\n",
       " 'yankovich': 52264,\n",
       " 'moderns': 40934,\n",
       " 'studliest': 52265,\n",
       " 'bedsheet': 52266,\n",
       " 'decapitation': 14903,\n",
       " 'slurring': 52267,\n",
       " \"'nunsploitation'\": 52268,\n",
       " \"'character'\": 34746,\n",
       " 'cambodia': 9883,\n",
       " 'rebelious': 52269,\n",
       " 'pasadena': 27660,\n",
       " 'crowne': 40935,\n",
       " \"'bedchamber\": 52270,\n",
       " 'conjectural': 52271,\n",
       " 'appologize': 52272,\n",
       " 'halfassing': 52273,\n",
       " 'paycheque': 57819,\n",
       " 'palms': 20609,\n",
       " \"'islands\": 52274,\n",
       " 'hawked': 40936,\n",
       " 'palme': 21922,\n",
       " 'conservatively': 40937,\n",
       " 'larp': 64010,\n",
       " 'palma': 5561,\n",
       " 'smelling': 21923,\n",
       " 'aragorn': 13001,\n",
       " 'hawker': 52275,\n",
       " 'hawkes': 52276,\n",
       " 'explosions': 3978,\n",
       " 'loren': 8062,\n",
       " \"pyle's\": 52277,\n",
       " 'shootout': 6707,\n",
       " \"mike's\": 18520,\n",
       " \"driscoll's\": 52278,\n",
       " 'cogsworth': 40938,\n",
       " \"britian's\": 52279,\n",
       " 'childs': 34747,\n",
       " \"portrait's\": 52280,\n",
       " 'chain': 3629,\n",
       " 'whoever': 2500,\n",
       " 'puttered': 52281,\n",
       " 'childe': 52282,\n",
       " 'maywether': 52283,\n",
       " 'chair': 3039,\n",
       " \"rance's\": 52284,\n",
       " 'machu': 34748,\n",
       " 'ballet': 4520,\n",
       " 'grapples': 34749,\n",
       " 'summerize': 76155,\n",
       " 'freelance': 30606,\n",
       " \"andrea's\": 52286,\n",
       " '\\x91very': 52287,\n",
       " 'coolidge': 45882,\n",
       " 'mache': 18521,\n",
       " 'balled': 52288,\n",
       " 'grappled': 40940,\n",
       " 'macha': 18522,\n",
       " 'underlining': 21924,\n",
       " 'macho': 5626,\n",
       " 'oversight': 19510,\n",
       " 'machi': 25260,\n",
       " 'verbally': 11314,\n",
       " 'tenacious': 21925,\n",
       " 'windshields': 40941,\n",
       " 'paychecks': 18560,\n",
       " 'jerk': 3399,\n",
       " \"good'\": 11934,\n",
       " 'prancer': 34751,\n",
       " 'prances': 21926,\n",
       " 'olympus': 52289,\n",
       " 'lark': 21927,\n",
       " 'embark': 10788,\n",
       " 'gloomy': 7368,\n",
       " 'jehaan': 52290,\n",
       " 'turaqui': 52291,\n",
       " \"child'\": 20610,\n",
       " 'locked': 2897,\n",
       " 'pranced': 52292,\n",
       " 'exact': 2591,\n",
       " 'unattuned': 52293,\n",
       " 'minute': 786,\n",
       " 'skewed': 16121,\n",
       " 'hodgins': 40943,\n",
       " 'skewer': 34752,\n",
       " 'think\\x85': 52294,\n",
       " 'rosenstein': 38768,\n",
       " 'helmit': 52295,\n",
       " 'wrestlemanias': 34753,\n",
       " 'hindered': 16829,\n",
       " \"martha's\": 30607,\n",
       " 'cheree': 52296,\n",
       " \"pluckin'\": 52297,\n",
       " 'ogles': 40944,\n",
       " 'heavyweight': 11935,\n",
       " 'aada': 82193,\n",
       " 'chopping': 11315,\n",
       " 'strongboy': 61537,\n",
       " 'hegemonic': 41345,\n",
       " 'adorns': 40945,\n",
       " 'xxth': 41349,\n",
       " 'nobuhiro': 34754,\n",
       " 'capitães': 52301,\n",
       " 'kavogianni': 52302,\n",
       " 'antwerp': 13425,\n",
       " 'celebrated': 6541,\n",
       " 'roarke': 52303,\n",
       " 'baggins': 40946,\n",
       " 'cheeseburgers': 31273,\n",
       " 'matras': 52304,\n",
       " \"nineties'\": 52305,\n",
       " \"'craig'\": 52306,\n",
       " 'celebrates': 13002,\n",
       " 'unintentionally': 3386,\n",
       " 'drafted': 14365,\n",
       " 'climby': 52307,\n",
       " '303': 52308,\n",
       " 'oldies': 18523,\n",
       " 'climbs': 9099,\n",
       " 'honour': 9658,\n",
       " 'plucking': 34755,\n",
       " '305': 30077,\n",
       " 'address': 5517,\n",
       " 'menjou': 40947,\n",
       " \"'freak'\": 42595,\n",
       " 'dwindling': 19511,\n",
       " 'benson': 9461,\n",
       " 'white’s': 52310,\n",
       " 'shamelessness': 40948,\n",
       " 'impacted': 21928,\n",
       " 'upatz': 52311,\n",
       " 'cusack': 3843,\n",
       " \"flavia's\": 37570,\n",
       " 'effette': 52312,\n",
       " 'influx': 34756,\n",
       " 'boooooooo': 52313,\n",
       " 'dimitrova': 52314,\n",
       " 'houseman': 13426,\n",
       " 'bigas': 25262,\n",
       " 'boylen': 52315,\n",
       " 'phillipenes': 52316,\n",
       " 'fakery': 40949,\n",
       " \"grandpa's\": 27661,\n",
       " 'darnell': 27662,\n",
       " 'undergone': 19512,\n",
       " 'handbags': 52318,\n",
       " 'perished': 21929,\n",
       " 'pooped': 37781,\n",
       " 'vigour': 27663,\n",
       " 'opposed': 3630,\n",
       " 'etude': 52319,\n",
       " \"caine's\": 11802,\n",
       " 'doozers': 52320,\n",
       " 'photojournals': 34757,\n",
       " 'perishes': 52321,\n",
       " 'constrains': 34758,\n",
       " 'migenes': 40951,\n",
       " 'consoled': 30608,\n",
       " 'alastair': 16830,\n",
       " 'wvs': 52322,\n",
       " 'ooooooh': 52323,\n",
       " 'approving': 34759,\n",
       " 'consoles': 40952,\n",
       " 'disparagement': 52067,\n",
       " 'futureistic': 52325,\n",
       " 'rebounding': 52326,\n",
       " \"'date\": 52327,\n",
       " 'gregoire': 52328,\n",
       " 'rutherford': 21930,\n",
       " 'americanised': 34760,\n",
       " 'novikov': 82199,\n",
       " 'following': 1045,\n",
       " 'munroe': 34761,\n",
       " \"morita'\": 52329,\n",
       " 'christenssen': 52330,\n",
       " 'oatmeal': 23109,\n",
       " 'fossey': 25263,\n",
       " 'livered': 40953,\n",
       " 'listens': 13003,\n",
       " \"'marci\": 76167,\n",
       " \"otis's\": 52333,\n",
       " 'thanking': 23390,\n",
       " 'maude': 16022,\n",
       " 'extensions': 34762,\n",
       " 'ameteurish': 52335,\n",
       " \"commender's\": 52336,\n",
       " 'agricultural': 27664,\n",
       " 'convincingly': 4521,\n",
       " 'fueled': 17642,\n",
       " 'mahattan': 54017,\n",
       " \"paris's\": 40955,\n",
       " 'vulkan': 52339,\n",
       " 'stapes': 52340,\n",
       " 'odysessy': 52341,\n",
       " 'harmon': 12262,\n",
       " 'surfing': 4255,\n",
       " 'halloran': 23497,\n",
       " 'unbelieveably': 49583,\n",
       " \"'offed'\": 52342,\n",
       " 'quadrant': 30610,\n",
       " 'inhabiting': 19513,\n",
       " 'nebbish': 34763,\n",
       " 'forebears': 40956,\n",
       " 'skirmish': 34764,\n",
       " 'ocassionally': 52343,\n",
       " \"'resist\": 52344,\n",
       " 'impactful': 21931,\n",
       " 'spicier': 52345,\n",
       " 'touristy': 40957,\n",
       " \"'football'\": 52346,\n",
       " 'webpage': 40958,\n",
       " 'exurbia': 52348,\n",
       " 'jucier': 52349,\n",
       " 'professors': 14904,\n",
       " 'structuring': 34765,\n",
       " 'jig': 30611,\n",
       " 'overlord': 40959,\n",
       " 'disconnect': 25264,\n",
       " 'sniffle': 82204,\n",
       " 'slimeball': 40960,\n",
       " 'jia': 40961,\n",
       " 'milked': 16831,\n",
       " 'banjoes': 40962,\n",
       " 'jim': 1240,\n",
       " 'workforces': 52351,\n",
       " 'jip': 52352,\n",
       " 'rotweiller': 52353,\n",
       " 'mundaneness': 34766,\n",
       " \"'ninja'\": 52354,\n",
       " \"dead'\": 11043,\n",
       " \"cipriani's\": 40963,\n",
       " 'modestly': 20611,\n",
       " \"professor'\": 52355,\n",
       " 'shacked': 40964,\n",
       " 'bashful': 34767,\n",
       " 'sorter': 23391,\n",
       " 'overpowering': 16123,\n",
       " 'workmanlike': 18524,\n",
       " 'henpecked': 27665,\n",
       " 'sorted': 18525,\n",
       " \"jōb's\": 52357,\n",
       " \"'always\": 52358,\n",
       " \"'baptists\": 34768,\n",
       " 'dreamcatchers': 52359,\n",
       " \"'silence'\": 52360,\n",
       " 'hickory': 21932,\n",
       " 'fun\\x97yet': 52361,\n",
       " 'breakumentary': 52362,\n",
       " 'didn': 15499,\n",
       " 'didi': 52363,\n",
       " 'pealing': 52364,\n",
       " 'dispite': 40965,\n",
       " \"italy's\": 25265,\n",
       " 'instability': 21933,\n",
       " 'quarter': 6542,\n",
       " 'quartet': 12611,\n",
       " 'padmé': 52365,\n",
       " \"'bleedmedry\": 52366,\n",
       " 'pahalniuk': 52367,\n",
       " 'honduras': 52368,\n",
       " 'bursting': 10789,\n",
       " \"pablo's\": 41468,\n",
       " 'irremediably': 52370,\n",
       " 'presages': 40966,\n",
       " 'bowlegged': 57835,\n",
       " 'dalip': 65186,\n",
       " 'entering': 6263,\n",
       " 'newsradio': 76175,\n",
       " 'presaged': 54153,\n",
       " \"giallo's\": 27666,\n",
       " 'bouyant': 40967,\n",
       " 'amerterish': 52371,\n",
       " 'rajni': 18526,\n",
       " 'leeves': 30613,\n",
       " 'macauley': 34770,\n",
       " 'seriously': 615,\n",
       " 'sugercoma': 52372,\n",
       " 'grimstead': 52373,\n",
       " \"'fairy'\": 52374,\n",
       " 'zenda': 30614,\n",
       " \"'twins'\": 52375,\n",
       " 'realisation': 17643,\n",
       " 'highsmith': 27667,\n",
       " 'raunchy': 7820,\n",
       " 'incentives': 40968,\n",
       " 'flatson': 52377,\n",
       " 'snooker': 35100,\n",
       " 'crazies': 16832,\n",
       " 'crazier': 14905,\n",
       " 'grandma': 7097,\n",
       " 'napunsaktha': 52378,\n",
       " 'workmanship': 30615,\n",
       " 'reisner': 52379,\n",
       " \"sanford's\": 61309,\n",
       " '\\x91doña': 52380,\n",
       " 'modest': 6111,\n",
       " \"everything's\": 19156,\n",
       " 'hamer': 40969,\n",
       " \"couldn't'\": 52382,\n",
       " 'quibble': 13004,\n",
       " 'socking': 52383,\n",
       " 'tingler': 21934,\n",
       " 'gutman': 52384,\n",
       " 'lachlan': 40970,\n",
       " 'tableaus': 52385,\n",
       " 'headbanger': 52386,\n",
       " 'spoken': 2850,\n",
       " 'cerebrally': 34771,\n",
       " \"'road\": 23493,\n",
       " 'tableaux': 21935,\n",
       " \"proust's\": 40971,\n",
       " 'periodical': 40972,\n",
       " \"shoveller's\": 52388,\n",
       " 'tamara': 25266,\n",
       " 'affords': 17644,\n",
       " 'concert': 3252,\n",
       " \"yara's\": 87958,\n",
       " 'someome': 52389,\n",
       " 'lingering': 8427,\n",
       " \"abraham's\": 41514,\n",
       " 'beesley': 34772,\n",
       " 'cherbourg': 34773,\n",
       " 'kagan': 28627,\n",
       " 'snatch': 9100,\n",
       " \"miyazaki's\": 9263,\n",
       " 'absorbs': 25267,\n",
       " \"koltai's\": 40973,\n",
       " 'tingled': 64030,\n",
       " 'crossroads': 19514,\n",
       " 'rehab': 16124,\n",
       " 'falworth': 52392,\n",
       " 'sequals': 52393,\n",
       " ...}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "index_word = {v:k for k,v in word_index.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 530,\n",
       " 973,\n",
       " 1622,\n",
       " 1385,\n",
       " 65,\n",
       " 458,\n",
       " 4468,\n",
       " 66,\n",
       " 3941,\n",
       " 2,\n",
       " 173,\n",
       " 2,\n",
       " 256,\n",
       " 2,\n",
       " 2,\n",
       " 100,\n",
       " 2,\n",
       " 838,\n",
       " 112,\n",
       " 50,\n",
       " 670,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 480,\n",
       " 284,\n",
       " 2,\n",
       " 150,\n",
       " 2,\n",
       " 172,\n",
       " 112,\n",
       " 167,\n",
       " 2,\n",
       " 336,\n",
       " 385,\n",
       " 2,\n",
       " 2,\n",
       " 172,\n",
       " 4536,\n",
       " 1111,\n",
       " 2,\n",
       " 546,\n",
       " 2,\n",
       " 2,\n",
       " 447,\n",
       " 2,\n",
       " 192,\n",
       " 50,\n",
       " 2,\n",
       " 2,\n",
       " 147,\n",
       " 2025,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 1920,\n",
       " 4613,\n",
       " 469,\n",
       " 2,\n",
       " 2,\n",
       " 71,\n",
       " 87,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 530,\n",
       " 2,\n",
       " 76,\n",
       " 2,\n",
       " 2,\n",
       " 1247,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 515,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 626,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 62,\n",
       " 386,\n",
       " 2,\n",
       " 2,\n",
       " 316,\n",
       " 2,\n",
       " 106,\n",
       " 2,\n",
       " 2,\n",
       " 2223,\n",
       " 2,\n",
       " 2,\n",
       " 480,\n",
       " 66,\n",
       " 3785,\n",
       " 2,\n",
       " 2,\n",
       " 130,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 619,\n",
       " 2,\n",
       " 2,\n",
       " 124,\n",
       " 51,\n",
       " 2,\n",
       " 135,\n",
       " 2,\n",
       " 2,\n",
       " 1415,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 215,\n",
       " 2,\n",
       " 77,\n",
       " 52,\n",
       " 2,\n",
       " 2,\n",
       " 407,\n",
       " 2,\n",
       " 82,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 107,\n",
       " 117,\n",
       " 2,\n",
       " 2,\n",
       " 256,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 3766,\n",
       " 2,\n",
       " 723,\n",
       " 2,\n",
       " 71,\n",
       " 2,\n",
       " 530,\n",
       " 476,\n",
       " 2,\n",
       " 400,\n",
       " 317,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 1029,\n",
       " 2,\n",
       " 104,\n",
       " 88,\n",
       " 2,\n",
       " 381,\n",
       " 2,\n",
       " 297,\n",
       " 98,\n",
       " 2,\n",
       " 2071,\n",
       " 56,\n",
       " 2,\n",
       " 141,\n",
       " 2,\n",
       " 194,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 226,\n",
       " 2,\n",
       " 2,\n",
       " 134,\n",
       " 476,\n",
       " 2,\n",
       " 480,\n",
       " 2,\n",
       " 144,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 51,\n",
       " 2,\n",
       " 2,\n",
       " 224,\n",
       " 92,\n",
       " 2,\n",
       " 104,\n",
       " 2,\n",
       " 226,\n",
       " 65,\n",
       " 2,\n",
       " 2,\n",
       " 1334,\n",
       " 88,\n",
       " 2,\n",
       " 2,\n",
       " 283,\n",
       " 2,\n",
       " 2,\n",
       " 4472,\n",
       " 113,\n",
       " 103,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 178,\n",
       " 2]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"UNK UNK UNK UNK UNK brilliant casting location scenery story direction everyone's really suited UNK part UNK played UNK UNK could UNK imagine being there robert UNK UNK UNK amazing actor UNK now UNK same being director UNK father came UNK UNK same scottish island UNK myself UNK UNK loved UNK fact there UNK UNK real connection UNK UNK UNK UNK witty remarks throughout UNK UNK were great UNK UNK UNK brilliant UNK much UNK UNK bought UNK UNK UNK soon UNK UNK UNK released UNK UNK UNK would recommend UNK UNK everyone UNK watch UNK UNK fly UNK UNK amazing really cried UNK UNK end UNK UNK UNK sad UNK UNK know what UNK say UNK UNK cry UNK UNK UNK UNK must UNK been good UNK UNK definitely UNK also UNK UNK UNK two little UNK UNK played UNK UNK UNK norman UNK paul UNK were UNK brilliant children UNK often left UNK UNK UNK UNK list UNK think because UNK stars UNK play them UNK grown up UNK such UNK big UNK UNK UNK whole UNK UNK these children UNK amazing UNK should UNK UNK UNK what UNK UNK done don't UNK think UNK whole story UNK UNK lovely because UNK UNK true UNK UNK someone's life after UNK UNK UNK UNK UNK us UNK\""
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in x_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(all_x_train,_),(all_x_valid,_) = imdb.load_data() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"START this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all\""
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in all_x_train[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Preprocess data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)\n",
    "x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1415,    2,    2,    2,    2,  215,    2,   77,   52,    2,    2,\n",
       "         407,    2,   82,    2,    2,    2,  107,  117,    2,    2,  256,\n",
       "           2,    2,    2, 3766,    2,  723,    2,   71,    2,  530,  476,\n",
       "           2,  400,  317,    2,    2,    2,    2, 1029,    2,  104,   88,\n",
       "           2,  381,    2,  297,   98,    2, 2071,   56,    2,  141,    2,\n",
       "         194,    2,    2,    2,  226,    2,    2,  134,  476,    2,  480,\n",
       "           2,  144,    2,    2,    2,   51,    2,    2,  224,   92,    2,\n",
       "         104,    2,  226,   65,    2,    2, 1334,   88,    2,    2,  283,\n",
       "           2,    2, 4472,  113,  103,    2,    2,    2,    2,    2,  178,\n",
       "           2],\n",
       "       [ 163,    2, 3215,    2,    2, 1153,    2,  194,  775,    2,    2,\n",
       "           2,  349, 2637,  148,  605,    2,    2,    2,  123,  125,   68,\n",
       "           2,    2,    2,  349,  165, 4362,   98,    2,    2,  228,    2,\n",
       "           2,    2, 1157,    2,  299,  120,    2,  120,  174,    2,  220,\n",
       "         175,  136,   50,    2, 4373,  228,    2,    2,    2,  656,  245,\n",
       "        2350,    2,    2,    2,  131,  152,  491,    2,    2,    2,    2,\n",
       "        1212,    2,    2,    2,  371,   78,    2,  625,   64, 1382,    2,\n",
       "           2,  168,  145,    2,    2, 1690,    2,    2,    2, 1355,    2,\n",
       "           2,    2,   52,  154,  462,    2,   89,   78,  285,    2,  145,\n",
       "          95],\n",
       "       [1301,    2, 1873,    2,   89,   78,    2,   66,    2,    2,  360,\n",
       "           2,    2,   58,  316,  334,    2,    2, 1716,    2,  645,  662,\n",
       "           2,  257,   85, 1200,    2, 1228, 2578,   83,   68, 3912,    2,\n",
       "           2,  165, 1539,  278,    2,   69,    2,  780,    2,  106,    2,\n",
       "           2, 1338,    2,    2,    2,    2,  215,    2,  610,    2,    2,\n",
       "          87,  326,    2, 2300,    2,    2,    2,    2,  272,    2,   57,\n",
       "           2,    2,    2,    2,    2,    2, 2307,   51,    2,  170,    2,\n",
       "         595,  116,  595, 1352,    2,  191,   79,  638,   89,    2,    2,\n",
       "           2,    2,  106,  607,  624,    2,  534,    2,  227,    2,  129,\n",
       "         113],\n",
       "       [   2,    2,    2,  188, 1076, 3222,    2,    2,    2,    2, 2348,\n",
       "         537,    2,   53,  537,    2,   82,    2,    2,    2,    2,    2,\n",
       "         280,    2,  219,    2,    2,  431,  758,  859,    2,  953, 1052,\n",
       "           2,    2,    2,    2,   94,    2,    2,  238,   60,    2,    2,\n",
       "           2,  804,    2,    2,    2,    2,  132,    2,   67,    2,    2,\n",
       "           2,    2,  283,    2,    2,    2,    2,    2,  242,  955,    2,\n",
       "           2,  279,    2,    2,    2, 1685,  195,    2,  238,   60,  796,\n",
       "           2,    2,  671,    2, 2804,    2,    2,  559,  154,  888,    2,\n",
       "         726,   50,    2,    2,    2,    2,  566,    2,  579,    2,   64,\n",
       "        2574],\n",
       "       [   2,    2,  131, 2073,  249,  114,  249,  229,  249,    2,    2,\n",
       "           2,  126,  110,    2,  473,    2,  569,   61,  419,   56,  429,\n",
       "           2, 1513,    2,    2,  534,   95,  474,  570,    2,    2,  124,\n",
       "         138,   88,    2,  421, 1543,   52,  725,    2,   61,  419,    2,\n",
       "           2, 1571,    2, 1543,    2,    2,    2,    2,    2,  296,    2,\n",
       "        3524,    2,    2,  421,  128,   74,  233,  334,  207,  126,  224,\n",
       "           2,  562,  298, 2167, 1272,    2, 2601,    2,  516,  988,    2,\n",
       "           2,   79,  120,    2,  595,    2,  784,    2, 3171,    2,  165,\n",
       "         170,  143,    2,    2,    2,    2,    2,  226,  251,    2,   61,\n",
       "         113],\n",
       "       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "           0,    0,    2,  778,  128,   74,    2,  630,  163,    2,    2,\n",
       "        1766,    2, 1051,    2,    2,   85,  156,    2,    2,  148,  139,\n",
       "         121,  664,  665,    2,    2, 1361,  173,    2,  749,    2,    2,\n",
       "        3804,    2,    2,  226,   65,    2,    2,  127,    2,    2,    2,\n",
       "           2]], dtype=int32)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train[0:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100\n",
      "100\n",
      "100\n",
      "100\n",
      "100\n",
      "100\n"
     ]
    }
   ],
   "source": [
    "for x in x_train[0:6]:\n",
    "    print(len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"cry UNK UNK UNK UNK must UNK been good UNK UNK definitely UNK also UNK UNK UNK two little UNK UNK played UNK UNK UNK norman UNK paul UNK were UNK brilliant children UNK often left UNK UNK UNK UNK list UNK think because UNK stars UNK play them UNK grown up UNK such UNK big UNK UNK UNK whole UNK UNK these children UNK amazing UNK should UNK UNK UNK what UNK UNK done don't UNK think UNK whole story UNK UNK lovely because UNK UNK true UNK UNK someone's life after UNK UNK UNK UNK UNK us UNK\""
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in x_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD UNK begins better than UNK ends funny UNK UNK russian UNK crew UNK UNK other actors UNK UNK those scenes where documentary shots UNK UNK spoiler part UNK message UNK UNK contrary UNK UNK whole story UNK UNK does UNK UNK UNK UNK'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in x_train[5])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Design neural network architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))\n",
    "model.add(Flatten())\n",
    "model.add(Dense(n_dense, activation='relu'))\n",
    "model.add(Dropout(dropout))\n",
    "# model.add(Dense(n_dense, activation='relu'))\n",
    "# model.add(Dropout(dropout))\n",
    "model.add(Dense(1, activation='sigmoid')) # mathematically equivalent to softmax with two classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_1 (Embedding)      (None, 100, 64)           320000    \n",
      "_________________________________________________________________\n",
      "flatten_1 (Flatten)          (None, 6400)              0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 64)                409664    \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 64)                0         \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 1)                 65        \n",
      "=================================================================\n",
      "Total params: 729,729\n",
      "Trainable params: 729,729\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary() # so many parameters!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64, 5000, 320000)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# embedding layer dimensions and parameters: \n",
    "n_dim, n_unique_words, n_dim*n_unique_words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100, 64, 6400)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ...flatten:\n",
    "max_review_length, n_dim, n_dim*max_review_length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64, 409664)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ...dense:\n",
    "n_dense, n_dim*max_review_length*n_dense + n_dense # weights + biases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "65"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ...and output:\n",
    "n_dense + 1 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Configure model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "modelcheckpoint = ModelCheckpoint(filepath=output_dir+\"/weights.{epoch:02d}.hdf5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "if not os.path.exists(output_dir):\n",
    "    os.makedirs(output_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Train!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 25000 samples, validate on 25000 samples\n",
      "Epoch 1/4\n",
      "25000/25000 [==============================] - 1s - loss: 0.5402 - acc: 0.7095 - val_loss: 0.3701 - val_acc: 0.8367\n",
      "Epoch 2/4\n",
      "25000/25000 [==============================] - 0s - loss: 0.2825 - acc: 0.8881 - val_loss: 0.3459 - val_acc: 0.8470\n",
      "Epoch 3/4\n",
      "25000/25000 [==============================] - 1s - loss: 0.1279 - acc: 0.9616 - val_loss: 0.4151 - val_acc: 0.8333\n",
      "Epoch 4/4\n",
      "25000/25000 [==============================] - 0s - loss: 0.0303 - acc: 0.9941 - val_loss: 0.5222 - val_acc: 0.8322\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fc3b1192ef0>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 84.7% validation accuracy in epoch 2\n",
    "model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.load_weights(output_dir+\"/weights.01.hdf5\") # zero-indexed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "23552/25000 [===========================>..] - ETA: 0s"
     ]
    }
   ],
   "source": [
    "y_hat = model.predict_proba(x_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25000"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(y_hat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.88548595], dtype=float32)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_hat[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFThJREFUeJzt3X+QXeV93/H3x8jYcWJb/FgYRpIrPFZcY88Y0x3A9Uzq\nWK4Q2IP4AzLyNEVmNFUnJWmSZtpC2xm1YDq4v0iYiUnVoEZ4EoNM46KxaagqYNx2CmYxhBgIozUQ\ntJWKNpaQmzImEfn2j/sIL2JXe1bavev1eb9mdu453/Oce54HCX32POfce1JVSJL6522L3QFJ0uIw\nACSppwwASeopA0CSesoAkKSeMgAkqacMAEnqKQNAknrKAJCknlq22B04kbPPPrtWr1692N2Q3ur7\nzw1e3/PBxe2HNI3HH3/8T6tqZLZ2P9IBsHr1asbGxha7G9Jb/bdPDl4//fBi9kKaVpI/6dLOKSBJ\n6ikDQJJ6ygCQpJ4yACSppwwASeqpTgGQ5FeTPJ3kO0m+kuSdSc5P8miSvUnuSXJ6a/uOtj7etq+e\n8j43tvpzSS5bmCFJkrqYNQCSrAD+PjBaVR8BTgM2Al8EbquqNcBhYHPbZTNwuKo+ANzW2pHkgrbf\nh4H1wJeSnDa/w5EkddV1CmgZ8BNJlgHvAg4AnwLubdt3AFe15Q1tnbZ9bZK0+t1V9VpVvQCMAxef\n+hAkSSdj1gCoqv8N/BvgJQb/8B8BHgdeqaqjrdkEsKItrwD2tX2PtvZnTa1Ps88bkmxJMpZkbHJy\n8mTGJEnqYNZPAic5g8Fv7+cDrwBfBS6fpumxp8tnhm0z1d9cqNoGbAMYHR09pSfWr77hG6ey+0l7\n8dbPLMpxJWkuukwBfRp4oaomq+ovgN8H/jqwvE0JAawE9rflCWAVQNv+XuDQ1Po0+0iShqxLALwE\nXJrkXW0ufy3wDPAQcHVrswm4ry3vauu07Q9WVbX6xnaX0PnAGuBb8zMMSdJczToFVFWPJrkX+DZw\nFHiCwRTNN4C7k3yh1e5su9wJfDnJOIPf/De293k6yU4G4XEUuL6qXp/n8UiSOur0baBVtRXYelz5\neaa5i6eqfgBcM8P73ALcMsc+SpIWgJ8ElqSeMgAkqacMAEnqKQNAknrKAJCknjIAJKmnDABJ6ikD\nQJJ6ygCQpJ4yACSppwwASeopA0CSesoAkKSeMgAkqacMAEnqKQNAknrKAJCknpo1AJJ8MMmTU36+\nn+RXkpyZZHeSve31jNY+SW5PMp7kqSQXTXmvTa393iSbZj6qJGmhzRoAVfVcVV1YVRcCfw14Ffga\ncAOwp6rWAHvaOsDlDB74vgbYAtwBkORMBo+VvITBoyS3HgsNSdLwzXUKaC3w3ar6E2ADsKPVdwBX\nteUNwF018AiwPMl5wGXA7qo6VFWHgd3A+lMegSTppMw1ADYCX2nL51bVAYD2ek6rrwD2TdlnotVm\nqr9Jki1JxpKMTU5OzrF7kqSuOgdAktOBK4GvztZ0mlqdoP7mQtW2qhqtqtGRkZGu3ZMkzdFczgAu\nB75dVS+39Zfb1A7t9WCrTwCrpuy3Eth/grokaRHMJQA+xw+nfwB2Acfu5NkE3Delfm27G+hS4Eib\nInoAWJfkjHbxd12rSZIWwbIujZK8C/ibwN+dUr4V2JlkM/AScE2r3w9cAYwzuGPoOoCqOpTkZuCx\n1u6mqjp0yiOQJJ2UTgFQVa8CZx1X+x6Du4KOb1vA9TO8z3Zg+9y7KUmab34SWJJ6qtMZgCT10eob\nvrFox37x1s8s+DE8A5CknjIAJKmnDABJ6ikDQJJ6ygCQpJ4yACSppwwASeopA0CSesoAkKSeMgAk\nqacMAEnqKQNAknrKAJCknjIAJKmnDABJ6qlOAZBkeZJ7k/xxkmeTfDzJmUl2J9nbXs9obZPk9iTj\nSZ5KctGU99nU2u9NsmnmI0qSFlrXM4DfAP6gqv4q8FHgWeAGYE9VrQH2tHWAy4E17WcLcAdAkjOB\nrcAlwMXA1mOhIUkavlkDIMl7gJ8B7gSoqj+vqleADcCO1mwHcFVb3gDcVQOPAMuTnAdcBuyuqkNV\ndRjYDayf19FIkjrrcgbwfmAS+I9Jnkjy20l+Eji3qg4AtNdzWvsVwL4p+0+02kz1N0myJclYkrHJ\nyck5D0iS1E2XAFgGXATcUVUfA/4fP5zumU6mqdUJ6m8uVG2rqtGqGh0ZGenQPUnSyegSABPARFU9\n2tbvZRAIL7epHdrrwSntV03ZfyWw/wR1SdIimDUAqur/APuSfLCV1gLPALuAY3fybALua8u7gGvb\n3UCXAkfaFNEDwLokZ7SLv+taTZK0CJZ1bPdLwO8mOR14HriOQXjsTLIZeAm4prW9H7gCGAdebW2p\nqkNJbgYea+1uqqpD8zIKSdKcdQqAqnoSGJ1m09pp2hZw/Qzvsx3YPpcOSpIWhp8ElqSeMgAkqacM\nAEnqKQNAknrKAJCknjIAJKmnDABJ6ikDQJJ6ygCQpJ4yACSppwwASeopA0CSesoAkKSeMgAkqacM\nAEnqKQNAknqqUwAkeTHJHyV5MslYq52ZZHeSve31jFZPktuTjCd5KslFU95nU2u/N8mmmY4nSVp4\nczkD+NmqurCqjj0Z7AZgT1WtAfa0dYDLgTXtZwtwBwwCA9gKXAJcDGw9FhqSpOE7lSmgDcCOtrwD\nuGpK/a4aeARYnuQ84DJgd1UdqqrDwG5g/SkcX5J0CroGQAH/NcnjSba02rlVdQCgvZ7T6iuAfVP2\nnWi1meqSpEXQ6aHwwCeqan+Sc4DdSf74BG0zTa1OUH/zzoOA2QLwvve9r2P3JElz1ekMoKr2t9eD\nwNcYzOG/3KZ2aK8HW/MJYNWU3VcC+09QP/5Y26pqtKpGR0ZG5jYaSVJnswZAkp9M8u5jy8A64DvA\nLuDYnTybgPva8i7g2nY30KXAkTZF9ACwLskZ7eLvulaTJC2CLlNA5wJfS3Ks/e9V1R8keQzYmWQz\n8BJwTWt/P3AFMA68ClwHUFWHktwMPNba3VRVh+ZtJJKkOZk1AKrqeeCj09S/B6ydpl7A9TO813Zg\n+9y7KUmab34SWJJ6ygCQpJ4yACSppwwASeopA0CSesoAkKSeMgAkqacMAEnqKQNAknrKAJCknjIA\nJKmnDABJ6ikDQJJ6ygCQpJ4yACSppwwASeopA0CSeqpzACQ5LckTSb7e1s9P8miSvUnuSXJ6q7+j\nrY+37aunvMeNrf5cksvmezCSpO7mcgbwy8CzU9a/CNxWVWuAw8DmVt8MHK6qDwC3tXYkuQDYCHwY\nWA98Kclpp9Z9SdLJ6hQASVYCnwF+u60H+BRwb2uyA7iqLW9o67Tta1v7DcDdVfVaVb3A4KHxF8/H\nICRJc9f1DODXgX8E/GVbPwt4paqOtvUJYEVbXgHsA2jbj7T2b9Sn2UeSNGSzBkCSzwIHq+rxqeVp\nmtYs2060z9TjbUkylmRscnJytu5Jkk5SlzOATwBXJnkRuJvB1M+vA8uTLGttVgL72/IEsAqgbX8v\ncGhqfZp93lBV26pqtKpGR0ZG5jwgSVI3swZAVd1YVSurajWDi7gPVtXfAh4Crm7NNgH3teVdbZ22\n/cGqqlbf2O4SOh9YA3xr3kYiSZqTZbM3mdE/Bu5O8gXgCeDOVr8T+HKScQa/+W8EqKqnk+wEngGO\nAtdX1euncHxJ0imYUwBU1cPAw235eaa5i6eqfgBcM8P+twC3zLWTkqT55yeBJamnDABJ6ikDQJJ6\nygCQpJ4yACSppwwASeopA0CSesoAkKSeMgAkqacMAEnqKQNAknrKAJCknjIAJKmnDABJ6ikDQJJ6\nygCQpJ4yACSpp2YNgCTvTPKtJH+Y5Okk/6LVz0/yaJK9Se5Jcnqrv6Otj7ftq6e8142t/lySyxZq\nUJKk2XU5A3gN+FRVfRS4EFif5FLgi8BtVbUGOAxsbu03A4er6gPAba0dSS5g8HzgDwPrgS8lOW0+\nByNJ6m7WAKiBP2urb28/BXwKuLfVdwBXteUNbZ22fW2StPrdVfVaVb0AjDPNM4UlScPR6RpAktOS\nPAkcBHYD3wVeqaqjrckEsKItrwD2AbTtR4Czptan2UeSNGSdAqCqXq+qC4GVDH5r/9B0zdprZtg2\nU/1NkmxJMpZkbHJyskv3JEknYU53AVXVK8DDwKXA8iTL2qaVwP62PAGsAmjb3wscmlqfZp+px9hW\nVaNVNToyMjKX7kmS5qDLXUAjSZa35Z8APg08CzwEXN2abQLua8u72jpt+4NVVa2+sd0ldD6wBvjW\nfA1EkjQ3y2ZvwnnAjnbHztuAnVX19STPAHcn+QLwBHBna38n8OUk4wx+898IUFVPJ9kJPAMcBa6v\nqtfndziSpK5mDYCqegr42DT155nmLp6q+gFwzQzvdQtwy9y7KUmab34SWJJ6ygCQpJ4yACSppwwA\nSeopA0CSesoAkKSeMgAkqacMAEnqKQNAknrKAJCknjIAJKmnDABJ6ikDQJJ6ygCQpJ7q8jwASVpU\nq2/4xmJ34ceSZwCS1FMGgCT1VJdnAq9K8lCSZ5M8neSXW/3MJLuT7G2vZ7R6ktyeZDzJU0kumvJe\nm1r7vUk2zXRMSdLC63IGcBT4tar6EHApcH2SC4AbgD1VtQbY09YBLmfwwPc1wBbgDhgEBrAVuITB\noyS3HgsNSdLwzRoAVXWgqr7dlv8v8CywAtgA7GjNdgBXteUNwF018AiwPMl5wGXA7qo6VFWHgd3A\n+nkdjSSpszldA0iymsED4h8Fzq2qAzAICeCc1mwFsG/KbhOtNlNdkrQIOgdAkp8C/hPwK1X1/RM1\nnaZWJ6gff5wtScaSjE1OTnbtniRpjjp9DiDJ2xn84/+7VfX7rfxykvOq6kCb4jnY6hPAqim7rwT2\nt/onj6s/fPyxqmobsA1gdHT0LQGxFCzWPcsv3vqZRTmupKWpy11AAe4Enq2qfzdl0y7g2J08m4D7\nptSvbXcDXQocaVNEDwDrkpzRLv6uazVJ0iLocgbwCeBvA3+U5MlW+yfArcDOJJuBl4Br2rb7gSuA\nceBV4DqAqjqU5Gbgsdbupqo6NC+jkCTN2awBUFX/g+nn7wHWTtO+gOtneK/twPa5dFCStDD8JLAk\n9ZQBIEk9ZQBIUk8ZAJLUUwaAJPWUASBJPWUASFJPGQCS1FMGgCT1lAEgST1lAEhSTxkAktRTBoAk\n9ZQBIEk9ZQBIUk91eiSkJMHiPe5UC8MzAEnqqVnPAJJsBz4LHKyqj7TamcA9wGrgReDnqupwe37w\nbzB4JOSrwOer6tttn03AP2tv+4Wq2jG/Q9Fi/nbmA+mlpafLGcDvAOuPq90A7KmqNcCetg5wObCm\n/WwB7oA3AmMrcAlwMbC1PRhekrRIZg2AqvomcPzD2zcAx36D3wFcNaV+Vw08AixPch5wGbC7qg5V\n1WFgN28NFUnSEJ3sReBzq+oAQFUdSHJOq68A9k1pN9FqM9UlzZEXYjVf5vsicKap1Qnqb32DZEuS\nsSRjk5OT89o5SdIPnWwAvNymdmivB1t9Alg1pd1KYP8J6m9RVduqarSqRkdGRk6ye5Kk2ZzsFNAu\nYBNwa3u9b0r9F5PczeCC75E2RfQA8C+nXPhdB9x48t3Wj5rFmpbw7iPp5HW5DfQrwCeBs5NMMLib\n51ZgZ5LNwEvANa35/QxuAR1ncBvodQBVdSjJzcBjrd1NVXX8hWVpSXnk+e+x0fl4LWGzBkBVfW6G\nTWunaVvA9TO8z3Zg+5x6J81isc487n7/9xbluNJ88pPAktRTBoAk9ZQBIEk9ZQBIUk8ZAJLUUwaA\nJPWUASBJPWUASFJPGQCS1FMGgCT1lAEgST1lAEhSTxkAktRTBoAk9ZQBIEk9ZQBIUk8ZAJLUU0MP\ngCTrkzyXZDzJDcM+viRpYKgBkOQ04DeBy4ELgM8luWCYfZAkDQz7DOBiYLyqnq+qPwfuBjYMuQ+S\nJIYfACuAfVPWJ1pNkjRky4Z8vExTqzc1SLYAW9rqnyV57hSOdzbwp6ew/1LTt/HCIo35428sfXbY\nhwb/nHshXzylMf+VLo2GHQATwKop6yuB/VMbVNU2YNt8HCzJWFWNzsd7LQV9Gy845r5wzAtj2FNA\njwFrkpyf5HRgI7BryH2QJDHkM4CqOprkF4EHgNOA7VX19DD7IEkaGPYUEFV1P3D/kA43L1NJS0jf\nxguOuS8c8wJIVc3eSpL0Y8evgpCknlryATDbV0skeUeSe9r2R5OsHn4v51eHMf+DJM8keSrJniSd\nbgn7Udb1K0SSXJ2kkiz5O0a6jDnJz7U/66eT/N6w+zjfOvzdfl+Sh5I80f5+X7EY/ZwvSbYnOZjk\nOzNsT5Lb23+Pp5JcNK8dqKol+8PgQvJ3gfcDpwN/CFxwXJu/B/xWW94I3LPY/R7CmH8WeFdb/oU+\njLm1ezfwTeARYHSx+z2EP+c1wBPAGW39nMXu9xDGvA34hbZ8AfDiYvf7FMf8M8BFwHdm2H4F8F8Y\nfIbqUuDR+Tz+Uj8D6PLVEhuAHW35XmBtkuk+kLZUzDrmqnqoql5tq48w+LzFUtb1K0RuBv4V8INh\ndm6BdBnz3wF+s6oOA1TVwSH3cb51GXMB72nL7+W4zxEtNVX1TeDQCZpsAO6qgUeA5UnOm6/jL/UA\n6PLVEm+0qaqjwBHgrKH0bmHM9es0NjP4DWIpm3XMST4GrKqqrw+zYwuoy5/zTwM/neR/Jnkkyfqh\n9W5hdBnzPwd+PskEg7sJf2k4XVs0C/r1OUO/DXSezfrVEh3bLCWdx5Pk54FR4G8saI8W3gnHnORt\nwG3A54fVoSHo8ue8jME00CcZnOX99yQfqapXFrhvC6XLmD8H/E5V/dskHwe+3Mb8lwvfvUWxoP9+\nLfUzgFm/WmJqmyTLGJw2nuiU60ddlzGT5NPAPwWurKrXhtS3hTLbmN8NfAR4OMmLDOZKdy3xC8Fd\n/27fV1V/UVUvAM8xCISlqsuYNwM7AarqfwHvZPA9QT+uOv3/frKWegB0+WqJXcCmtnw18GC1qytL\n1KxjbtMh/57BP/5LfV4YZhlzVR2pqrOranVVrWZw3ePKqhpbnO7Oiy5/t/8zgwv+JDmbwZTQ80Pt\n5fzqMuaXgLUAST7EIAAmh9rL4doFXNvuBroUOFJVB+brzZf0FFDN8NUSSW4CxqpqF3Ang9PEcQa/\n+W9cvB6fuo5j/tfATwFfbde7X6qqKxet06eo45h/rHQc8wPAuiTPAK8D/7Cqvrd4vT41Hcf8a8B/\nSPKrDKZCPr+Uf6FL8hUGU3hnt+saW4G3A1TVbzG4znEFMA68Clw3r8dfwv/tJEmnYKlPAUmSTpIB\nIEk9ZQBIUk8ZAJLUUwaAJPWUASBJPWUASFJPGQCS1FP/HzDTXo9aBGoAAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fc3b060c710>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(y_hat)\n",
    "_ = plt.axvline(x=0.5, color='orange')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pct_auc = roc_auc_score(y_valid, y_hat)*100.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'92.88'"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"{:0.2f}\".format(pct_auc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "float_y_hat = []\n",
    "for y in y_hat:\n",
    "    float_y_hat.append(y[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ydf = pd.DataFrame(list(zip(float_y_hat, y_valid)), columns=['y_hat', 'y'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y_hat</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.885486</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.094784</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.825777</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.882278</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.316841</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.546900</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.041849</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.047915</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.955369</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.541998</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      y_hat  y\n",
       "0  0.885486  1\n",
       "1  0.094784  1\n",
       "2  0.825777  1\n",
       "3  0.882278  1\n",
       "4  0.316841  1\n",
       "5  0.546900  0\n",
       "6  0.041849  0\n",
       "7  0.047915  0\n",
       "8  0.955369  1\n",
       "9  0.541998  1"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ydf.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'START how his charter evolved as both man and ape was outstanding not to mention the scenery of the film christopher lambert was astonishing as lord of greystoke christopher is the soul to this masterpiece i became so with his performance i could feel my heart pounding the of the movie still moves me to this day his portrayal of john was oscar worthy as he should have been nominated for it'"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in all_x_valid[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"START this movie is horrible you won't believe this hunk of junk is even a movie was better then this and was pretty frigging bad too a bunch of stupid teens crash in a desert find an old run down bungalow and end up fending off horrifically badly stop motion animated spiders pardon my french but the acting was bad as hell the person who wrote this probably didn't even know what a spider is because he had the spiders living in a colony serving an alien queen ripoff queen spider spiders do not live in colonies this movie is a piece of crud at the end the marines suddenly pop out of no where and kill all the spider without even being called if you see a copy of this movie at a video store it in gasoline and throw a match at it\""
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in all_x_valid[6]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y_hat</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0.957828</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>0.961272</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>0.933923</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>0.938723</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>0.921509</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>0.902109</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>0.943218</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489</th>\n",
       "      <td>0.984264</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>0.913043</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>644</th>\n",
       "      <td>0.936996</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        y_hat  y\n",
       "25   0.957828  0\n",
       "143  0.961272  0\n",
       "150  0.933923  0\n",
       "204  0.938723  0\n",
       "258  0.921509  0\n",
       "283  0.902109  0\n",
       "304  0.943218  0\n",
       "489  0.984264  0\n",
       "542  0.913043  0\n",
       "644  0.936996  0"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ydf[(ydf.y == 0) & (ydf.y_hat > 0.9)].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"START low budget but still creepy enough to hold your interest in another take off on the familiar frankenstein story this movie is also known as lady frankenstein the alluring frankenstein sara bay fresh from medical school arrives at her father's estate to find that he is still up to his old tricks baron frankenstein joseph cotten is murdered by his own creation and now his daughter decides to carry on the family tradition by creating herself a lover this is closer to being an eerie melodrama than horror flick supporting cast features mickey hargitay paul paul muller and herbert a rainy night could amplify the atmosphere still a fun watch\""
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in all_x_valid[489]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y_hat</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.094784</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>0.093522</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>151</th>\n",
       "      <td>0.092449</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>0.043815</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>426</th>\n",
       "      <td>0.051367</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>0.067008</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>732</th>\n",
       "      <td>0.054247</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>926</th>\n",
       "      <td>0.081515</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>927</th>\n",
       "      <td>0.049135</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>951</th>\n",
       "      <td>0.036430</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        y_hat  y\n",
       "1    0.094784  1\n",
       "101  0.093522  1\n",
       "151  0.092449  1\n",
       "239  0.043815  1\n",
       "426  0.051367  1\n",
       "543  0.067008  1\n",
       "732  0.054247  1\n",
       "926  0.081515  1\n",
       "927  0.049135  1\n",
       "951  0.036430  1"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ydf[(ydf.y == 1) & (ydf.y_hat < 0.1)].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"START i saw it in europe plex great movie br br this film is an exploration of the spirit and the flesh in modern times protagonist jim kirk drives an unwieldy rv across america stopping often to fill his gas guzzling tank he is middle aged and confused he fuels his thick diabetic body with cups of coffee and radio chatter he is the flesh agitated and sometimes spaced out fairly oblivious to the growing tension around him but feeling it as of discomfort br br the spirit the film through speeches and other sounds as well as what appears and goes by in the visual field the spirit eventually collides with the flesh and kirk goes down unable to comprehend what has happened to him he's been in denial about just how bad things have become due to he of all of us because we are all focused on the needs and desires of our flesh we're all in the same denial and so we like kirk are in danger of going down and being blown away by desert sands just like him\""
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(index_word[id] for id in all_x_valid[927]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
